"""Various utils used across the pipeline herder."""
from collections import abc
import dataclasses
import datetime
import enum
from functools import cached_property
from functools import lru_cache
from importlib import resources
import json
import os
import pathlib
import re
from tempfile import NamedTemporaryFile
import typing

from cki_lib import chatbot
from cki_lib import gitlab
from cki_lib import misc
from cki_lib import yaml
from cki_lib.kcidb.file import KCIDBFile
from cki_lib.logger import get_logger
from cki_lib.session import get_session

LOGGER = get_logger(__name__)
SESSION = get_session(__name__, raise_for_status=True)


def _load_config() -> dict[typing.Any, typing.Any]:
    return yaml.load(
        schema_path=resources.files(__package__) / 'schema.yml',
        contents=os.environ.get('PIPELINE_HERDER_CONFIG'),
        file_path=os.environ.get('PIPELINE_HERDER_CONFIG_PATH'),
    )


CONFIG = _load_config()


class RetryReason(enum.Enum):
    """Reason why a retry should (not) be performed."""

    def __init__(self, message: str, notify_chat: bool = False, retry: bool = False) -> None:
        """Create a new reason."""
        self.message = message
        self.notify_chat = notify_chat
        self.retry = retry

    RETRY_OK = ('everything fine', True, True)
    RETRY_LIMIT_REACHED = ('retry limit reached', True)
    EXTERNALLY_RESTARTED = 'job restarted externally'


@dataclasses.dataclass
class RetryRating:
    """Rating whether a retry should be performed."""

    reason: RetryReason
    details: str = ''

    @property
    def message(self) -> str:
        """Return the combined message including optional details."""
        if self.details:
            return f'{self.reason.message} ({self.details})'
        return self.reason.message


class CachedJob:
    """Cache information about a GitLab job to minimize API calls."""

    def __init__(self, web_url: str) -> None:
        """Initialize a cached job."""
        self.gl_instance, self.gl_job = gitlab.parse_gitlab_url(web_url)
        # https://rednafi.github.io/reflections/dont-wrap-instance-methods-with-functoolslru_cache-decorator-in-python.html
        self.artifact_file = lru_cache()(self._artifact_file)

    @cached_property
    def gl_project(self):
        """Return the GitLab project."""
        return self.gl_instance.projects.get(self.gl_job.pipeline['project_id'])

    @cached_property
    def gl_pipeline(self):
        """Return the GitLab pipeline."""
        return self.gl_project.pipelines.get(self.gl_job.pipeline['id'])

    @cached_property
    def gl_pipeline_jobs(self):
        """Return all jobs of the GitLab pipeline."""
        return self.gl_pipeline.jobs.list(all=True, include_retried=True)

    @cached_property
    def variables(self) -> dict[str, str]:
        """Return the GitLab pipeline variables."""
        return {v.key: v.value for v in self.gl_pipeline.variables.list()}

    @cached_property
    def trace(self) -> list[str]:
        """Return the GitLab job trace."""
        return self.gl_job.trace().decode('utf8', errors='replace').split('\n')

    @cached_property
    def artifacts_meta(self) -> dict[str, str]:
        """Return the artifact meta data."""
        try:
            return json.loads(self.gl_job.artifact('artifacts-meta.json'))
        # pylint: disable=broad-except
        except Exception:
            return {'mode': 'gitlab'}

    def _s3_artifact_file(self, name: str, maximum_artifact_size: int) -> str:
        """Return one artifact file from S3."""
        response = SESSION.get(f'{self.artifacts_meta["s3_url"]}/{name}', stream=True)
        if not response.encoding:
            response.encoding = 'utf8'
        return next(response.iter_content(maximum_artifact_size, decode_unicode=True), '')

    def _gitlab_artifact_file(self, name) -> bytes:
        """Return one artifact file from GitLab."""
        return self.gl_job.artifact(name)

    def _artifact_file(self, name, maximum_artifact_size, *, split=True):
        """Return one artifact file or None/an empty list if not found."""
        try:
            if self.artifacts_meta['mode'] == 'gitlab':
                artifact = self._gitlab_artifact_file(name).decode('utf-8', errors='replace')
            else:
                artifact = self._s3_artifact_file(name, maximum_artifact_size)
        # pylint: disable=broad-except
        except Exception:
            return [] if split else None
        return artifact.split('\n') if split else artifact

    @cached_property
    def auth_user_id(self):
        """Return the user owning the GitLab connection."""
        instance = self.gl_instance
        if not hasattr(instance, 'user'):
            instance.auth()
        return instance.user.id

    def job_name_count(self) -> int:
        """Return the number of jobs in the pipeline with the same name."""
        return len([j for j in self.gl_pipeline_jobs
                    if j.name == self.gl_job.name])

    def retry_delay(self, match_result: 'MatchResult') -> datetime.timedelta:
        """Return the time to wait before a retry."""
        delay_index = self.job_name_count() - 1
        if delay_index >= len(match_result.retry_delays):
            delay_index = -1
        return misc.parse_timedelta(match_result.retry_delays[delay_index])

    def retry_rating(self, matcher: 'MatchResult') -> RetryRating:
        # pylint: disable=too-many-return-statements
        """Check whether a job can be safely retried by the herder."""
        # there should be no newer job with the same name
        newer_jobs = [f'J{j.id}' for j in self.gl_pipeline_jobs
                      if j.name == self.gl_job.name and j.id > self.gl_job.id]
        if newer_jobs:
            return RetryRating(RetryReason.EXTERNALLY_RESTARTED,
                               details=', '.join(newer_jobs))

        # only retry up to a maximum number of times
        if self.job_name_count() > matcher.retry_limit:
            return RetryRating(RetryReason.RETRY_LIMIT_REACHED, details=str(matcher.retry_limit))

        return RetryRating(RetryReason.RETRY_OK)


@dataclasses.dataclass(frozen=True, kw_only=True)
class MatchContext:
    # pylint: disable=too-many-instance-attributes
    """Recursively-built match context."""

    name: str = ''
    description: str = ''
    action: str = 'retry'
    maximum_artifact_size: int = 1_000_000
    retry_delays: abc.Collection[str] = ('5m',)
    retry_limit: int = 3

    web_url: abc.Collection[str] = dataclasses.field(default_factory=list)
    job_status: abc.Collection[str] = ('failed',)
    job_name: str = ''
    variables: dict[str, abc.Collection[str | None]] = dataclasses.field(default_factory=dict)
    exemplars: list[str] = dataclasses.field(default_factory=list)
    failure_reason: str = ''
    builtin: str = ''
    messages: abc.Collection[str] = dataclasses.field(default_factory=list)
    file_name: str = ''
    tail_lines: int = 300


@dataclasses.dataclass(frozen=True, kw_only=True)
class MatchResult:
    """Positive match result."""

    name: str
    description: str
    action: str
    maximum_artifact_size: int
    retry_delays: abc.Collection[str]
    retry_limit: int

    @classmethod
    def from_context(cls, context: MatchContext, **kwargs: typing.Any) -> 'MatchResult':
        """Create a result from a context."""
        common_fields = {
            f.name for f in dataclasses.fields(cls)
        } & {
            f.name for f in dataclasses.fields(context)
        }
        return MatchResult(**{k: getattr(context, k) for k in common_fields} | kwargs)


class Matcher:
    """Check whether a context matches a job."""

    context: MatchContext
    job: CachedJob

    def __init__(self, context: MatchContext, job: CachedJob) -> None:
        """Check whether a context matches a job."""
        self.context = context
        self.job = job

    @staticmethod
    def _check_lines(message: str, lines: list[str]) -> bool:
        """Check lines for a match."""
        if message.startswith('/') and message.endswith('/'):
            return re.search(message[1:-1], '\n'.join(lines)) is not None
        return any(message in line for line in lines)

    def check_web_url(self) -> bool | None:
        """Check that the job URL matches if specified, otherwise None."""
        if web_url := self.context.web_url:
            return any(self.job.gl_job.web_url.startswith(w) for w in web_url)
        return None

    def check_job_status(self) -> bool | None:
        """Check that the job status matches if specified, otherwise None."""
        if job_status := self.context.job_status:
            return self.job.gl_job.status in job_status
        return None

    def check_job_name(self) -> bool | None:
        """Check that the job name matches if specified, otherwise None."""
        if job_name := self.context.job_name:
            return self.job.gl_job.name.startswith(job_name)
        return None

    def _match(self, key: str, values: abc.Collection[str | None]) -> bool:
        if key not in self.job.variables:
            return None in values
        return any(re.fullmatch(r, self.job.variables[key]) for r in values if r is not None)

    def check_variables(self) -> bool | None:
        """Check that the trigger variables match if specified, otherwise None."""
        if variables := self.context.variables:
            return all(self._match(k, v) for k, v in variables.items())
        return None

    def check_failure_reason(self) -> bool | None:
        """Check that the job failure reason matches if specified, otherwise None."""
        if failure_reason := self.context.failure_reason:
            return self.job.gl_job.attributes.get('failure_reason') == failure_reason
        return None

    def check_messages(self) -> bool | None:
        """Check that the job output matches if specified, otherwise None."""
        if self.context.messages:
            all_lines = self.job.artifact_file(
                self.context.file_name,
                self.context.maximum_artifact_size,
                split=True,
            ) if self.context.file_name else self.job.trace
            lines = all_lines[-self.context.tail_lines:]
            return any(self._check_lines(m, lines) for m in self.context.messages)
        return None

    def check_no_trace(self) -> bool | None:
        """Check that the job output was empty if requested, otherwise None."""
        if self.context.builtin == 'no_trace':
            return self.job.trace == ['']
        return None

    def check_missed_tests(self) -> typing.Literal[False] | MatchResult | None:
        """Check whether kcidb tests were missed if requested, otherwise None."""
        if self.context.builtin == 'missed_tests':
            if not (kcidb_content := self.job.artifact_file(
                    'kcidb_all.json', self.context.maximum_artifact_size, split=False)):
                return False

            with NamedTemporaryFile() as tmpfile:
                pathlib.Path(tmpfile.name).write_text(kcidb_content, encoding='utf8')
                kcidbfile = KCIDBFile(tmpfile.name)

            if not (count := len([
                test for test in kcidbfile.data.get('tests', [])
                if not test.get('status') or misc.get_nested_key(test, 'misc/forced_skip_status')
            ])):
                return False

            # custom MatchResult with number of tests
            return MatchResult.from_context(self.context,
                                            description=f'{count} tests that did not run')
        return None

    def check(self) -> MatchResult | None:
        # pylint: disable=too-many-return-statements
        """Check whether a context matches a job."""
        if self.check_web_url() is False:
            return None
        if self.check_job_status() is False:
            return None
        if self.check_job_name() is False:
            return None
        if self.check_variables() is False:
            return None
        if self.check_failure_reason() is False:
            return None
        if self.check_messages() is False:
            return None
        if self.check_no_trace() is False:
            return None
        if (missed_tests_result := self.check_missed_tests()) is False:
            return None

        return missed_tests_result or MatchResult.from_context(self.context)


def match_contexts(
    node: dict[str, typing.Any] | None = None,
    context: MatchContext = MatchContext(),
) -> abc.Iterator[MatchContext]:
    """Recurse into the matchers tree and check."""
    if node is None:
        node = CONFIG
    context = dataclasses.replace(context, **{
        k: node[k] for k in (set(node.keys()) & {f.name for f in dataclasses.fields(context)})
    })
    if matchers := node.get('matchers', []):
        for m in matchers:
            yield from match_contexts(m, context)
    else:
        yield context


def check(job: CachedJob) -> MatchResult | None:
    """Check for a match."""
    return next((r for m in match_contexts() if (r := Matcher(m, job).check())), None)


def notify(job: CachedJob, notification: str, notify_chat: bool) -> None:
    """Send a message to the logs and optionally the chat bot."""
    with misc.only_log_exceptions():
        status = job.gl_job.status
        name = job.gl_job.name
        job_id = job.gl_job.id
        pipeline_id = job.gl_pipeline.id
        job_url = job.gl_job.web_url
        pipeline_url = job.gl_pipeline.web_url
        message = (f'🤠 <{pipeline_url}|P{pipeline_id}> <{job_url}|J{job_id}> '
                   f'{name} {status}: {notification}')
        LOGGER.info('%s', message)
        if notify_chat:
            chatbot.send_message(message)
